#!/usr/bin/env python
# -*- coding:utf-8 -*-
# @FileName  :收集搜索词.py
# @Time      :2023/10/17 
# @Author    :CL
# @email     :1037654919@qq.com
from utils import mongo_manager, MongoDBUtil


def shouji_words():
    # run
    import jieba
    bd_article_format = mongo_manager("bd_article_format", db="article")
    zhihu_question_label_foramt = mongo_manager("zhihu_question_label_foramt", db="article")
    csdn_article = mongo_manager("csdn_article_2023-07-10", db="article")
    seeds = zhihu_question_label_foramt.findAll()
    word_lists = []
    for seed in seeds:
        word_lists.append(seed["label"])
    zhihu_question_label_foramt.close()
    print(word_lists[:100])
    print(len(word_lists))
    new_words = []
    for word in word_lists:
        new_words += jieba.cut(word)
    print(len(new_words))
    new_words = list(set(new_words))
    new_words = [word for word in new_words if word != " " or word != "" or word != "."]
    print(len(new_words))
    print(new_words[:100])
    for kw in new_words:
        infos = {'_id': kw, 'word': kw}
        try:
            MongoDBUtil.insert_one('search_words', infos)
        except:
            pass
if __name__ == "__main__":
    print()
    shouji_words()